We study data on Mongolia and Japan.
knitr::include_url("https://icu-hsuzuki.github.io/science/index-j.html")
$url
[1] "https://icu-hsuzuki.github.io/science/index-j.html"
$height
[1] "400px"
$url.orig
[1] "https://icu-hsuzuki.github.io/science/index-j.html"
attr(,"class")
[1] "knit_embed_url" "knit_asis_url"
Sys.setenv(LANG = "en")
library(tidyverse)
library(WDI)
url_edu1 <- "https://www.gender.go.jp/about_danjo/whitepaper/r03/zentai/html/honpen/csv/zuhyo01-05-01.csv"
download.file(url_edu1, destfile = "data/edu1.csv")
trying URL 'https://www.gender.go.jp/about_danjo/whitepaper/r03/zentai/html/honpen/csv/zuhyo01-05-01.csv'
Content type 'text/csv' length 3220 bytes
==================================================
downloaded 3220 bytes
guess_encoding("data/edu1.csv")
df_edu1 <- read_csv("data/edu1.csv", locale = locale(encoding = "Shift-JIS"), skip = 2)
Rows: 71 Columns: 10
── Column specification ───────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): 年度
dbl (9): 高等学校等(男子), 高等学校等(女子), 専修学校(専門課程,男子), 専修学校(専門課程,女子), 大学(学部,男子), 大学(学部,女子), 短期大学(本科,女子), 大学院...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_edu1
colnames(df_edu1)
[1] "年度" "高等学校等(男子)" "高等学校等(女子)"
[4] "専修学校(専門課程,男子)" "専修学校(専門課程,女子)" "大学(学部,男子)"
[7] "大学(学部,女子)" "短期大学(本科,女子)" "大学院(男子)"
[10] "大学院(女子)"
df_edu1$年度
[1] "昭和25" "26" "27" "28" "29" "30" "31" "32" "33" "34" "35"
[12] "36" "37" "38" "39" "40" "41" "42" "43" "44" "45" "46"
[23] "47" "48" "49" "50" "51" "52" "53" "54" "55" "56" "57"
[34] "58" "59" "60" "61" "62" "63" "平成元" "2" "3" "4" "5"
[45] "6" "7" "8" "9" "10" "11" "12" "13" "14" "15" "16"
[56] "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27"
[67] "28" "29" "30" "令和元" "2"
df_edu0 <- df_edu1
colnames(df_edu0) <- c("year", "highschool_m", "highschool_f", "vocational_m", "vocational_f", "university_m", "university_f", "juniorcol_f", "gradschool_m", "gradschool_f")
df_edu00 <- df_edu0 %>% mutate(year = 1950:2020,
highschool = (highschool_m + highschool_f)/2,
vocational = (vocational_m + vocational_f)/2,
university = (university_m + university_f)/2,
juniorcol = juniorcol_f,
gradschool = (gradschool_m + gradschool_f)/2)
df_edu00 %>% select(-(2:10)) %>%
pivot_longer(!year, names_to = "schools", values_to = "percentage") %>%
ggplot(aes(x = year, y = percentage, color = schools)) +
geom_line()
Warning: Removed 49 row(s) containing missing values (geom_path).
df_edu11 <- df_edu1 %>% mutate(年 = 1950:2020,
高等学校 = (`高等学校等(男子)`+ `高等学校等(女子)`)/2,
専修学校 = (`専修学校(専門課程,男子)`+ `専修学校(専門課程,女子)`)/2,
大学 = (`大学(学部,男子)` + `大学(学部,女子)`)/2,
短期大学 = `短期大学(本科,女子)`,
大学院 = (`大学院(男子)` + `大学院(女子)`)/2) %>%
select(-(1:10))
df_edu11
df_edu11 %>%
pivot_longer(2:6, names_to = "学校", values_to = "進学率") %>%
ggplot(aes(x = 年, y = 進学率, color = 学校)) +
geom_line()
df_edu11 %>%
pivot_longer(2:6, names_to = "学校", values_to = "進学率") %>%
ggplot(aes(x = 年, y = 進学率, color = 学校)) +
geom_line() +
theme_gray (base_family = "HiraginoSans-W3") # or base_family = "HiraKakuPro-W3"
Warning: Removed 49 row(s) containing missing values (geom_path).
df_edu1 %>% mutate(year = 1950:2020) %>%
pivot_longer(2:10, names_to = "学校", values_to = "進学率") %>%
ggplot(aes(x = year, y = 進学率, color = 学校)) +
geom_line() +
theme_gray (base_family = "HiraKakuPro-W3") # or base_family = "HiraginoSans-W3"
Warning: Removed 94 row(s) containing missing values (geom_path).
#{r dev='rag_png'} df_edu1 %>% mutate(year = 1950:2020) %>% pivot_longer(2:10, names_to = "学校", values_to = "進学率") %>% ggplot(aes(x = year, y = 進学率, color = 学校)) + geom_line()